#-----------------------------------#
# Replication of:
# Seeking Opportunity in the Knowledge Economy: Moving Places, Moving Politics?
# Valentina Consiglio, Thomas Kurer

# File: 5_models_main_NUTS3

# Note: This file runs the main analysis of the paper. It takes the following steps:
# (1) Tests for model specification and provides Figure C.2. on Treatment Variation ()
# (2) Estimates the 'effect' of changes in the opportunity index on various political outcomes and exports tables
# (3) Estimates the 'effect' of any type of move on various political outcomes and exports Tables
# (4) Creates Figure 5 and exports a dataset with the respective  coefficient estimates

#-----------------------------------#
 

#-------------------------------------------------------------------------------# 
# Clear workspace
rm(list=ls())


#### Load Packages ####
library(pacman)
p_load(car, arm, lmtest, vars, ggplot2, psych, stargazer, dplyr, readxl, tidyverse, 
       haven, labelled, gtsummary, survey, srvyr, BMisc, questionr, plm, texreg, broom, fixest, modelsummary, bife, data.table, ggthemes)

#### Define relevant paths ####
data_path <- "data_created/"
graph_path <- "figures/" 
tables_path <- "tables/" 

#### Set run mode ####
data_room <- FALSE  
# Set TRUE for replication in the data room; code will run through fully; 
# if FALSE, only Figure 5 will be generated based on csv files in replication folder.

#-------------------------------------------------------------------------------# 
#### Import data #### 
if (data_room) { 
load((paste0(data_path, "df_2009_soepoi_v4.RData")))

#-----------------------------------------------------------------------------------------------------#
#### Additional data manipulation and creation of subsamples ####
#-----------------------------------------------------------------------------------------------------#

#-----------------------------------------------------------------------------------------------------#
##### Subsets of data for different years ####
#-----------------------------------------------------------------------------------------------------#

# df_2009: start year 2009

# df: standard, start year 2010
df <- df_2009 %>%
  filter(syear>2009)

# df_2014: start year 2014
df_2014 <- df_2009 %>%
  filter(syear>2013)

#-----------------------------------------------------------------------------------------------------#
##### Additional variables ####
#-----------------------------------------------------------------------------------------------------#

# Store datasets in list
datasets <- list(df_2009 = df_2009, df = df, df_2014 = df_2014)

# Apply data transformation to each dataset
datasets <- lapply(datasets, function(data) {
  data %>%
    group_by(pid) %>%
    mutate(
      # Create index counting number of observations 
      count_years = syear - first(syear) + 1,
      
      # New vars with 0 for first year and NA = NA
      upmove2 = ifelse(count_years== 1, 0, # take value NA if original takes N
                       ifelse(is.na(upmove), NA, upmove)), # take value 0 if first year in dataset 
      upmove_ub2 = ifelse(count_years== 1, 0,
                          ifelse(is.na(upmove_ub), NA, upmove_ub)), 
      PC1_upmove_ub2 = ifelse(count_years== 1, 0,
                          ifelse(is.na(PC1_upmove_ub), NA, PC1_upmove_ub)), 
      PC2_upmove_ub2 = ifelse(count_years== 1, 0,
                              ifelse(is.na(PC2_upmove_ub), NA, PC2_upmove_ub)), 
      
      downmove2 = ifelse(count_years== 1, 0, 
                         ifelse(is.na(downmove), NA, downmove)), 
      downmove_ub2 = ifelse(count_years== 1, 0, 
                            ifelse(is.na(downmove_ub), NA,downmove_ub)), 
      PC1_downmove_ub2 = ifelse(count_years== 1, 0, 
                            ifelse(is.na(PC1_downmove_ub), NA,PC1_downmove_ub)), 
      PC2_downmove_ub2 = ifelse(count_years== 1, 0, 
                            ifelse(is.na(PC2_downmove_ub), NA,PC2_downmove_ub)), 
      
      moved_krs2 = ifelse(count_years== 1, 0, 
                          ifelse(is.na(moved_krs), NA, moved_krs)),
      moved_krs_bal2 = ifelse(count_years == 1, 0, 
                              ifelse(is.na(moved_krs_bal), NA, moved_krs_bal)),
      occ_chg_move = ifelse(moved_krs2 == 1 & occ_chg == 1, 1, 
                            ifelse(moved_krs2 == 1 & is.na(occ_chg), 0,
                                   0)),
      
      # Cumulative sum (count move variable, based on variable that is 0 in first year)
      count_moved_krs2 = cumsum(ifelse(is.na(moved_krs2), 0, moved_krs2)),
      
      
      
      # Total up / down
      total_up2 = sum(replace_na(upmove2, 0)),
      total_down2 = sum(replace_na(downmove2, 0)),
      total_up_ub2 = sum(replace_na(upmove_ub2, 0)),
      total_down_ub2 = sum(replace_na(downmove_ub2, 0)),
      
      PC1_total_up_ub2 = sum(replace_na(PC1_upmove_ub2, 0)),
      PC1_total_down_ub2 = sum(replace_na(PC1_downmove_ub2, 0)),
      
      PC2_total_up_ub2 = sum(replace_na(PC2_upmove_ub2, 0)),
      PC2_total_down_ub2 = sum(replace_na(PC2_downmove_ub2, 0)),
                      
     # Flag multiple movers
      mm2 = ifelse(total_up2 + total_down2 > 1, 1, ifelse(total_up2 + total_down2 %in% c(0,1), 0, NA)),
      mm_ub2 = ifelse(total_up_ub2 + total_down_ub2 > 1, 1, ifelse(total_up_ub2 + total_down_ub2 %in% c(0,1), 0, NA)),
     
     # Recode opportunity index for symmetry analysis (downmoves)
     opportunity_pc01_wn_rc = opportunity_pc01_wn * (-1),
     pc1_01_wn_rc = pc1_01_wn * (-1),
     pc2_01_wn_rc = pc2_01_wn * (-1)
     
    ) %>%
    ungroup()
})


# Transform datasets into original datasets
df_2009 <- datasets$df_2009
df <- datasets$df
df_2014 <- datasets$df_2014


#-----------------------------------------------------------------------------------------------------#
##### Subsamples upmove/downmove ####
#-----------------------------------------------------------------------------------------------------#

# Keep only upward moves in sample 
# -- based on unbalanced vars
datasets_upmove_ub2 <- lapply(datasets, function(data) {
  data %>%
    filter((total_up_ub2 == 1 | total_up_ub2 == 0) & mm_ub2 == 0 & total_down_ub2 == 0)
  
})

# Store datasets from list
df_2009_upmove_ub2 <- datasets_upmove_ub2$df_2009
df_upmove_ub2 <- datasets_upmove_ub2$df
df_2014_upmove_ub2 <- datasets_upmove_ub2$df_2014

rm(datasets_upmove_ub2) # remove list with datasets


# Keep only downward moves in sample (based on balanced vars)
datasets_downmove_ub2 <- lapply(datasets, function(data) {
  data %>%
    filter((total_down_ub2 == 1 | total_down_ub2 == 0) & mm_ub2 == 0 & total_up_ub2 == 0)
  
})

# Store datasets from list
df_2009_downmove_ub2 <- datasets_downmove_ub2$df_2009
df_downmove_ub2 <- datasets_downmove_ub2$df
df_2014_downmove_ub2 <- datasets_downmove_ub2$df_2014

# PC1
# -- based on unbalanced vars
datasets_PC1_upmove_ub2 <- lapply(datasets, function(data) {
  data %>%
    filter((PC1_total_up_ub2 == 1 | PC1_total_up_ub2 == 0) & mm_ub2 == 0 & PC1_total_down_ub2 == 0)
  
})

# Store datasets from list
df_2009_PC1_upmove_ub2 <- datasets_PC1_upmove_ub2$df_2009
df_PC1_upmove_ub2 <- datasets_PC1_upmove_ub2$df
df_2014_PC1_upmove_ub2 <- datasets_PC1_upmove_ub2$df_2014

rm(datasets_PC1_upmove_ub2) # remove list with datasets


# Keep only downward moves in sample (based on balanced vars)
datasets_PC1_downmove_ub2 <- lapply(datasets, function(data) {
  data %>%
    filter((PC1_total_down_ub2 == 1 | PC1_total_down_ub2 == 0) & mm_ub2 == 0 & PC1_total_up_ub2 == 0)
  
})

# Store datasets from list
df_2009_PC1_downmove_ub2 <- datasets_PC1_downmove_ub2$df_2009
df_PC1_downmove_ub2 <- datasets_PC1_downmove_ub2$df
df_2014_PC1_downmove_ub2 <- datasets_PC1_downmove_ub2$df_2014


# PC2
# -- based on unbalanced vars
datasets_PC2_upmove_ub2 <- lapply(datasets, function(data) {
  data %>%
    filter((PC2_total_up_ub2 == 1 | PC2_total_up_ub2 == 0) & mm_ub2 == 0 & PC2_total_down_ub2 == 0)
  
})

# Store datasets from list
df_2009_PC2_upmove_ub2 <- datasets_PC2_upmove_ub2$df_2009
df_PC2_upmove_ub2 <- datasets_PC2_upmove_ub2$df
df_2014_PC2_upmove_ub2 <- datasets_PC2_upmove_ub2$df_2014

rm(datasets_PC2_upmove_ub2) # remove list with datasets


# Keep only downward moves in sample (based on balanced vars)
datasets_PC2_downmove_ub2 <- lapply(datasets, function(data) {
  data %>%
    filter((PC2_total_down_ub2 == 1 | PC2_total_down_ub2 == 0) & mm_ub2 == 0 & PC2_total_up_ub2 == 0)
  
})

# Store datasets from list
df_2009_PC2_downmove_ub2 <- datasets_PC2_downmove_ub2$df_2009
df_PC2_downmove_ub2 <- datasets_PC2_downmove_ub2$df
df_2014_PC2_downmove_ub2 <- datasets_PC2_downmove_ub2$df_2014

rm(datasets_downmove_ub2, datasets_PC1_downmove_ub2, datasets_PC2_downmove_ub2, datasets) # remove lists with datasets

#-----------------------------------------------------------------------------------------------------#
##### Subsamples without multiple movers ####
#-----------------------------------------------------------------------------------------------------#

## Dataset without multiple movers ##

# Unbalanced
df_womm <- df %>%
  filter(mm_ub2 == 0) 

df_2009_womm <- df_2009 %>%
  filter(mm_ub2 == 0)

df_2014_womm <- df_2014 %>%
  filter(mm_ub2 == 0)

}

#-------------------------------------------------------------------------------# 
#### 1 | Testing for model and specification + descriptives index change ####
#-------------------------------------------------------------------------------# 

if (data_room) { 

## Function to run specification tests ##
run_spec_tests <- function(dv, df_used, dv_label = NULL) {
  if (is.null(dv_label)) dv_label <- dv
  
  # Construct formula
  formula_str <- paste0(dv, " ~ opportunity_pc01_wn + age + as.factor(region)")
  formula <- as.formula(formula_str)
  
  # Individual fixed effects
  individual <- tryCatch(plm(formula, data = df_used, index = c("pid", "syear"),
                             model = "within", effect = "individual"),
                         error = function(e) e)
  
  # Two-way fixed effects
  twoway <- tryCatch(plm(formula, data = df_used, index = c("pid", "syear"),
                         model = "within", effect = "twoway"),
                     error = function(e) e)
  
  # Fixed effects (for Hausman)
  within_model <- tryCatch(plm(formula, data = df_used, index = c("pid", "syear"),
                               model = "within", effect = "twoway"),
                           error = function(e) e)
  
  # Random effects
  random_model <- tryCatch(plm(formula, data = df_used, index = c("pid", "syear"),
                               model = "random", effect = "twoway"),
                           error = function(e) e)
  
  # Run tests
  pf_res <- tryCatch(pFtest(twoway, individual), error = function(e) e)
  ph_res <- tryCatch(phtest(within_model, random_model), error = function(e) e)
  
  # Output
  cat("\n--------------------------------------------------\n")
  cat("Model Specification Tests for:", dv_label, "\n")
  
  if (inherits(pf_res, "htest")) {
    cat("pF test (H0: no time FE needed): p-value =", format.pval(pf_res$p.value, digits = 4), "\n")
  } else {
    cat("pF test failed:", pf_res$message, "\n")
  }
  
  if (inherits(ph_res, "htest")) {
    cat("Hausman test (H0: RE is better): p-value =", format.pval(ph_res$p.value, digits = 4), "\n")
  } else {
    cat("Hausman test failed:", ph_res$message, "\n")
  }
}

## Define dependent variables and datasets ##

# Political integration/orientation outcomes (non-interrupted DVs)
dv_list1 <- list(
  #"ft_volunt_d"       = df, 
  #"ft_localpol_d"     = df, 
  #"votefedel_lead"    = df_2009, 
  "partyleand2"       = df,
  "partyleanint"      = df,
  #"polor"             = df_2009, 
  "centre_left2"      = df,
  "centre_right2"     = df
)

# Party ID outcomes
dv_list2 <- list(
  "spd_co2"       = df,
  "cducsu_co2"    = df,
  "greens_co2"    = df,
  "fdp_co2"       = df,
  "linke_co2"     = df,
  "afd_co2"       = df_2014
)

## Run specification tests for each DV ##

cat("\n==== Running Specification Tests for Political Integration & Orientation ====\n")
for (dv in names(dv_list1)) {
  run_spec_tests(dv, dv_list1[[dv]])
}

cat("\n==== Running Specification Tests for Party Identification ====\n")
for (dv in names(dv_list2)) {
  run_spec_tests(dv, dv_list2[[dv]])
}
}

if (data_room) { 
#-------------------------------------------------------------------------------# 
# Descriptive plot index change

df_indexchange <- df %>%
  select(pid, syear, moved_krs, opportunity_pc01_wn, oichange_ub) %>%
  group_by(pid) %>%
  mutate(mover = sum(moved_krs)) %>%
  filter(mover > 0 & moved_krs == 1)

# Bins 70
  ggplot(data = df_indexchange) +
    geom_histogram(aes(x = oichange_ub),
                   alpha = 0.4, bins = 70, color = "black", linewidth = 0.2,
                   position = 'identity') +
    labs(title = "", x = "Opportunity Index Change", y = "Frequency") +
    theme_minimal()

 # ggsave((paste0(graph_path, "gghist_OI_change_bins70.png")), height = 5, width = 7 )

  # Bins 50
  ggplot(data = df_indexchange) +
    geom_histogram(aes(x = oichange_ub),
                   alpha = 0.4, bins = 50, color = "black", linewidth = 0.2,
                   position = 'identity') +
    labs(title = "", x = "Opportunity Index Change", y = "Frequency") +
    theme_minimal()
  
  ggsave((paste0(graph_path, "Appendix_Figure_C2.png")), height = 5, width = 7, dpi = 600)
}

#-------------------------------------------------------------------------------# 
#### 2 | FE-Models with Opportunity Index as IV  ####
#-------------------------------------------------------------------------------# 

if (data_room) { 

## Texreg input 
vec_gof_names <- c("Num. obs." = "N",
                   "Num. groups: pid" = "N individuals",
                   "Num. groups: syear" = "N years")



#-------------------------------------------------------------------------------# 
##### 2.1 | Political integration and orientation #####
#-------------------------------------------------------------------------------# 

fe_main_polint_polor2 <-  list(
  "Volunteering (Yes/No)" = feols(ft_volunt_d ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) 
                                  | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  
  "Local political activism (Yes/No)" = feols(ft_localpol_d ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) 
                                              | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  
  "Voting in federal election (Yes/No)" = feols(votefedel_lead ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype)
                                                | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df_2009),
  
  "Party leaning (Yes/No)" = feols(partyleand2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype)
                                   | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  
  "Party leaning intensity (0-5)" = feols(partyleanint ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype)
                                          | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  
  "Political orientation (0 L – 10 R)" = feols(polor ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) 
                                                      | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df_2009),
  
  "Centre left party ID" = feols(centre_left2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) 
                                             | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  
  "Centre right party ID" = feols(centre_right2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) |
                                                pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df)
)


texreg(fe_main_polint_polor2)


texreg(fe_main_polint_polor2, digits = 3,
       label = "tab:fe_main_polint_polor2",
       caption = "Opportunity moves and political integration and orientation",
       custom.coef.map = list('opportunity_pc01_wn' = 'Opportunity Index'),
       custom.gof.rows = list("Individual fixed-effects" = c("YES", "YES", "YES", "YES", "YES", "YES", "YES", "YES"), "Time fixed-effect" = c("YES", "YES", "YES", "YES", "YES", "YES", "YES", "YES")),
       stars = c(0.01, 0.05, 0.1), include.rsquared = F, include.adjrs = F,
       custom.gof.names = vec_gof_names,                                             
       custom.note = paste("%stars. All models include age group, education group, and household type as control variables; Standard errors are clustered at the Kreis-level. Source: SOEP v.37, 2009/10-2020."),
       file = paste0(tables_path, "Table_3.tex"))


#--------------------------------------------------------------------------------------------#

#-------------------------------------------------------------------------------# 
##### 2.2 | Party identification #####
#-------------------------------------------------------------------------------# 

# -all those with 0 party identification are in the reference category of the dummies

fe_main_partid2 <-  list(
  "SPD " = feols(spd_co2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                   pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "CDU/CSU " = feols(cducsu_co2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                       pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "Gruene " = feols(greens_co2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                      pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "FDP " = feols(fdp_co2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                   pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "Linke" = feols(linke_co2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                    pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "AfD " = feols(afd_co2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                   pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df_2014)
)

texreg(fe_main_partid2)


texreg(fe_main_partid2, digits = 3,
       label = "tab:fe_main_partid2",
       caption = "Opportunity moves and party identification",
       custom.coef.map = list('opportunity_pc01_wn' = 'Opportunity Index'),
       custom.gof.rows = list("Individual fixed-effects" = c("YES", "YES", "YES", "YES", "YES", "YES"), "Time fixed-effect" = c("YES", "YES", "YES", "YES", "YES", "YES")),
       stars = c(0.01, 0.05, 0.1), include.rsquared = F, include.adjrs = F,
       custom.gof.names = vec_gof_names,                                            
       custom.note = paste("%stars. Party leanings include coalitions in the political direction of the respective party (i.e., including left coalitions for SPD, Gruene, and Linke; right coalitions for CDU/CSU, FDP, and AfD). All models include age group, education group, and household type as control variables; Standard errors are clustered at the Kreis-level. Source: SOEP v.37, 2010-2020."),
       file = paste0(tables_path, "Table_4.tex"))

#--------------------------------------------------------------------------------------------#

#-------------------------------------------------------------------------------# 
#### 3 | FE-Models with Kreis-level Move as IV (count) ####
#-------------------------------------------------------------------------------# 

#-------------------------------------------------------------------------------# 
##### 3.1 | Political integration and orientation #####
#-------------------------------------------------------------------------------# 

# unbalanced

fe_moved_ub_polint_polor2 <-  list(
  "Volunteering (Yes/No)" = feols(ft_volunt_d ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ) + as.factor(hhtype) | 
                                    pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "Local political activism (Yes/No)" = feols(ft_localpol_d ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ) + as.factor(hhtype) | 
                                                pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "Voting in federal election (Yes/No)" = feols(votefedel_lead ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                                                  syear, panel.id = ~pid + syear, cluster = "krs", data = df_2009),
  
  
  "Party leaning (Yes/No)" = feols(partyleand2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ) + as.factor(hhtype)| 
                                     pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "Party leaning intensity (0-5)" = feols(partyleanint ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ) + as.factor(hhtype)| 
                                            pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "Political orientation (0 L – 10 R)" = feols(polor ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                                                 pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df_2009),
  "Centre left party ID" = feols(centre_left2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                                   pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "Centre right party ID" = feols(centre_right2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                                    pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df)
)

texreg(fe_moved_ub_polint_polor2)



#-------------------------------------------------------------------------------# 
##### 3.2 | Party identification #####
#-------------------------------------------------------------------------------# 

# With alternative coding where all those with 0 party identification are in the reference category of the dummies

# Unbalanced

fe_moved_ub_partid2 <-  list(
  "SPD " = feols(spd_co2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                   pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "CDU/CSU " = feols(cducsu_co2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                       pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "Gruene " = feols(greens_co2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                      pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "FDP " = feols(fdp_co2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                   pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "Linke" = feols(linke_co2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                    pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  "AfD " = feols(afd_co2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | 
                   pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df_2014)
)

texreg(fe_moved_ub_partid2)


#------------------------------------------------------------------------------# 
#### 4 | Data for plots ####
#-------------------------------------------------------------------------------# 

#-------------------------------------------------------------------------------# 
##### 4.1 Main analysis and moved only ######
#-------------------------------------------------------------------------------# 

## Table main analysis ##
# Opportunity Index 

# Write function to extract statistics
tidy_feols <- function(model, dv) {
  tidy_model <- tidy(model)
  tidy_model <- head(tidy_model, 1)
  tidy_model$DV <- dv
  return(tidy_model)
}

# List of models

models_list_oi <- list(
  ft_volunt_d = list(formula = ft_volunt_d ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  ft_localpol_d = list(formula = ft_localpol_d ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  votefedel_lead = list(formula = votefedel_lead ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df_2009),
  partyleand2 = list(formula = partyleand2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  partyleanint = list(formula = partyleanint ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  polor = list(formula = polor ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df_2009),
  centre_left2 = list(formula = centre_left2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  centre_right2 = list(formula = centre_right2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  spd_co2 = list(formula = spd_co2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  cducsu_co2 = list(formula = cducsu_co2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  greens_co2 = list(formula = greens_co2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  fdp_co2 = list(formula = fdp_co2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  linke_co2 = list(formula = linke_co2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  afd_co2 = list(formula = afd_co2 ~ opportunity_pc01_wn + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df_2014)
)

# Apply function to each model and combine results
models_oi_tidy <- do.call(rbind, lapply(names(models_list_oi), function(dv) {
  model_info <- models_list_oi[[dv]]
  model <- feols(model_info$formula, data = model_info$data)
  tidy_feols(model, dv)
}))

models_oi_tidy$iv <- "opportunity_pc01_wn"


# Moved 

# List of models

models_list_moved_krs2 <- list(
  ft_volunt_d = list(formula = ft_volunt_d ~ moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  ft_localpol_d = list(formula = ft_localpol_d ~ moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  votefedel_lead = list(formula = votefedel_lead ~ moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df_2009),
  partyleand2 = list(formula = partyleand2 ~ moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  partyleanint = list(formula = partyleanint ~ moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  polor = list(formula = polor ~ moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df_2009),
  centre_left2 = list(formula = centre_left2 ~ moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  centre_right2 = list(formula = centre_right2 ~ moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  spd_co2 = list(formula = spd_co2 ~ moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  cducsu_co2 = list(formula = cducsu_co2 ~ moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  greens_co2 = list(formula = greens_co2 ~ moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  fdp_co2 = list(formula = fdp_co2 ~ moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  linke_co2 = list(formula = linke_co2 ~ moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  afd_co2 = list(formula = afd_co2 ~ moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df_2014)
)

# Apply function to each model and combine results
models_moved_krs2_tidy <- do.call(rbind, lapply(names(models_list_moved_krs2), function(dv) {
  model_info <- models_list_moved_krs2[[dv]]
  model <- feols(model_info$formula, data = model_info$data)
  tidy_feols(model, dv)
}))

models_moved_krs2_tidy$iv <- "moved_krs2"



#-------------------------------------------------------------------------------# 
##### 4.2 Main analysis and moved only ######
#-------------------------------------------------------------------------------# 

# List of models

models_list_count_moved_krs2 <- list(
  ft_volunt_d = list(formula = ft_volunt_d ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  ft_localpol_d = list(formula = ft_localpol_d ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  votefedel_lead = list(formula = votefedel_lead ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df_2009),
  partyleand2 = list(formula = partyleand2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  partyleanint = list(formula = partyleanint ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  polor = list(formula = polor ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df_2009),
  centre_left2 = list(formula = centre_left2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  centre_right2 = list(formula = centre_right2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  spd_co2 = list(formula = spd_co2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  cducsu_co2 = list(formula = cducsu_co2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  greens_co2 = list(formula = greens_co2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  fdp_co2 = list(formula = fdp_co2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  linke_co2 = list(formula = linke_co2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df),
  afd_co2 = list(formula = afd_co2 ~ count_moved_krs2 + as.factor(age_grp3) + as.factor(educ_h) + as.factor(hhtype) | pid + syear, panel.id = ~pid + syear, cluster = "krs", data = df_2014)
)

# Apply function to each model and combine results
models_count_moved_krs2_tidy <- do.call(rbind, lapply(names(models_list_count_moved_krs2), function(dv) {
  model_info <- models_list_count_moved_krs2[[dv]]
  model <- feols(model_info$formula, data = model_info$data)
  tidy_feols(model, dv)
}))

models_count_moved_krs2_tidy$iv <- "count_moved_krs2"



# Bind models tidy and models tidy moved
models_tidy_oi_moved <- rbind(models_oi_tidy, models_moved_krs2_tidy, models_count_moved_krs2_tidy)
models_tidy_oi_moved

write.csv(models_tidy_oi_moved, paste0(data_path, "5_models_main_NUTS3_opportunityvsanymove.csv"))
}

#-------------------------------------------------------------------------------# 
#### 5 | Model Plot - Opportunity vs any move ####
#-------------------------------------------------------------------------------# 


models_tidy_oi_moved <- read.csv(file = "data_created/5_models_main_NUTS3_opportunityvsanymove.csv")

# Rename
models_tidy_oi_moved$se <- models_tidy_oi_moved$"std.error"
models_tidy_oi_moved$pvalue <- models_tidy_oi_moved$"p.value"

# Select variables
models_tidy_oi_moved_adj <- models_tidy_oi_moved %>%
  select(term, DV, estimate, se, pvalue) %>%
  filter(term != "moved_krs2")

# Rescale polor
models_tidy_oi_moved_adj$estimate[models_tidy_oi_moved_adj$DV == "polor"] <- models_tidy_oi_moved_adj$estimate[models_tidy_oi_moved_adj$DV == "polor"] / 10 
models_tidy_oi_moved_adj$se[models_tidy_oi_moved_adj$DV == "polor"] <- models_tidy_oi_moved_adj$se[models_tidy_oi_moved_adj$DV == "polor"] / 10  

# Calculate 95%CIs
models_tidy_oi_moved_adj$CI_l95 <- models_tidy_oi_moved_adj$estimate - (1.96 * models_tidy_oi_moved_adj$se)
models_tidy_oi_moved_adj$CI_u95 <- models_tidy_oi_moved_adj$estimate + (1.96 * models_tidy_oi_moved_adj$se)



models_tidy_oi_moved_adj$varlab <- c("Volunteering", 
                   "Local political activism", 
                   "Voting in federal election",
                   
                   "Party leaning",
                   "Party leaning intensity (0-5)",
                   
                   "Left-right pol. orientation",
                   
                   "Centre left party ID",
                   "Centre right party ID",
                   
                   "SPD and left coalition",
                   "CDU/CSU and right coalition",
                   "Gruene and left coalition",
                   "FDP and right coalition",
                   "Linke and left coalition",
                   "AfD and right coalition",
                   "Volunteering", 
                   "Local political activism", 
                   "Voting in federal election",
                   
                   "Party leaning",
                   "Party leaning intensity (0-5)",
                   
                   "Left-right pol. orientation",
                   
                   "Centre left party ID",
                   "Centre right party ID",
                   
                   "SPD and left coalition",
                   "CDU/CSU and right coalition",
                   "Gruene and left coalition",
                   "FDP and right coalition",
                   "Linke and left coalition",
                   "AfD and right coalition")

models_tidy_oi_moved_adj$group <- c("opportunity move", 
                  "opportunity move", 
                  "opportunity move", 
                  "opportunity move", 
                  "opportunity move", 
                  "opportunity move", 
                  "opportunity move", 
                  "opportunity move", 
                  "opportunity move", 
                  "opportunity move", 
                  "opportunity move", 
                  "opportunity move", 
                  "opportunity move", 
                  "opportunity move", 
                  
                  "any move",
                  "any move",
                  "any move",
                  "any move",
                  "any move",
                  "any move",
                  "any move",
                  "any move",
                  "any move",
                  "any move",
                  "any move",
                  "any move",
                  "any move",
                  "any move")

# Drop intensity
models_tidy_oi_moved_adj2 <- models_tidy_oi_moved_adj %>%
  filter(DV != "partyleanint")



# Generate plot

gg_polintpolor <- ggplot(data = models_tidy_oi_moved_adj2, 
                         mapping = aes(y = varlab,
                                       x = estimate,
                                       color = group,
                                       shape = group)) +
  geom_errorbar(aes(xmin = CI_l95, xmax = CI_u95), 
                lwd = 0.5, width = 0, position = position_dodge(width = 0.5)) + 
  geom_point(size = 2, position = position_dodge(width = 0.5)) +
  scale_color_manual(values = c("opportunity move" = "black", "any move" = "grey50")) + 
  scale_shape_manual(values = c("opportunity move" = 16, "any move" = 17)) +  # 16 = circle, 17 = triangle
  geom_vline(aes(xintercept = 0), col = 'darkgrey', size = 0.2) +
  labs(
    x = "Effect on political integration and orientation", 
    y = element_blank()) +
  theme_economist_white(gray_bg = FALSE) +
  theme(panel.grid.major = element_line(size = 0.3, linetype = "dotted"),
        axis.title.x = element_text(margin = margin(t = 5, r = 0, b = 0, l = 0)),
        legend.title = element_blank()) +
  scale_y_discrete(labels = function(x) str_wrap(x, width = 30),
                   position = "right",
                   limits = c("AfD and right coalition",
                              "Linke and left coalition",
                              "FDP and right coalition",
                              "Gruene and left coalition",
                              "CDU/CSU and right coalition",
                              "SPD and left coalition",
                              "Centre right party ID",
                              "Centre left party ID",
                              "Left-right pol. orientation",
                              "Party leaning",
                              "Voting in federal election",
                              "Local political activism",  
                              "Volunteering"))

gg_polintpolor


ggsave("figures/Figure_5.png",
       plot = gg_polintpolor, 
       dpi = 600,
       height = 7, width = 7)


#-------------------------------------------------------------------------------# 
# Remove all model lists from storage
rm(list = ls(pattern = "^fe_"))

#-------------------------------------------------------------------------------# 
#-------------------------------------------------------------------------------# 

